{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import numpy as np\n",
    "import statsmodels.formula.api as smf\n",
    "from statsmodels.stats.proportion import proportions_ztest\n",
    "import csop_helper as csop\n",
    "\n",
    "from IPython.display import display, display_markdown\n",
    "import pickle\n",
    "\n",
    "#Feel free to un-silence warnings by commenting out the lines below\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "true"
   },
   "source": [
    "# Overview \n",
    "This file runs the regressions referred to in the main text and supplemental information. The helper functions are: \n",
    "* pairwise_\\[form of analysis\\](): these functions return dictionaries of fitted models, one for each pairwise analysis \n",
    "* display_summaries() takes a dictionary generated by the pairwise functions (except OLS), and outputs summaries of each model in that pairwise analysis. See examples of usage throughout the notebook. \n",
    "\n",
    "\n",
    "Functions that begin with \"process results\" summarize dictionaries generated by the respective functions by generating pandas dataframes -- this is used to format the results for exporting to Excel. \n",
    "\n",
    "Code for pickling and exporting the results is included at the end of the notebook. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Helper Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "def pairwise_config_analysis_complexity_interaction(data, dep_var_colname, drop_na):\n",
    "    model_summaries = {}\n",
    "    for config in [\"nominal-group-random\",\"nominal-group-duration\", \"nominal-group-efficiency\", \"nominal-group-score\"]:\n",
    "        print(\"Dependent variable: {} -- Comparing real-groups to {}\".format(dep_var_colname, config))\n",
    "        if drop_na:\n",
    "            temp_df = data.query(\"group_formation == 'real-group' or group_formation == '{}'\".format(config)).dropna(subset=[dep_var_colname])\n",
    "        else:\n",
    "            temp_df = data.query(\"group_formation == 'real-group' or group_formation == '{}'\".format(config))\n",
    "        model_summaries[config] = smf.mixedlm(\"{} ~ C(group_formation, Treatment(reference='{}')) * C(complexity_cat, Treatment(reference='Very low'))\".format(dep_var_colname, config),\n",
    "                                              data=temp_df, \n",
    "                                              groups=temp_df['game_id']).fit() \n",
    "        \n",
    "    return model_summaries\n",
    "\n",
    "\n",
    "def pairwise_config_analysis(data, dep_var_colname):\n",
    "    model_summaries = {}\n",
    "    for config in [\"nominal-group-random\",\"nominal-group-duration\", \"nominal-group-efficiency\", \"nominal-group-score\"]:\n",
    "        print(\"Dependent variable: {} -- Comparing real-groups to {}\".format(dep_var_colname, config))\n",
    "        temp_df = data.query(\"group_formation == 'real-group' or group_formation == '{}'\".format(config))\n",
    "        model_summaries[config] = smf.mixedlm(\"{} ~ C(group_formation, Treatment(reference='{}'))\".format(dep_var_colname, config),\n",
    "                                              data=temp_df, \n",
    "                                              groups=temp_df['game_id']).fit() \n",
    "        \n",
    "    return model_summaries\n",
    "\n",
    "\n",
    "def pairwise_OLS(data, dep_var_colname):\n",
    "    model_summaries = {}\n",
    "    for config in [\"nominal-group-random\",\"nominal-group-duration\", \"nominal-group-efficiency\", \"nominal-group-score\"]:\n",
    "        config_dict = {}\n",
    "        print(\"Dependent variable: {} -- Comparing real-groups to {}\".format(dep_var_colname, config))\n",
    "        for complexity in [\"Very low\", \"Low\", \"Moderate\", \"High\", \"Very high\"]:\n",
    "            temp_df = data.query(\"group_formation == 'real-group' or group_formation == '{}'\".format(config)).query(\"complexity_cat == @complexity\")\n",
    "            config_dict[complexity] = smf.ols(\"{} ~ C(group_formation, Treatment(reference='{}'))\".format(dep_var_colname, config),\n",
    "                                              data=temp_df).fit() \n",
    "\n",
    "        model_summaries[config] = config_dict \n",
    "        \n",
    "    return model_summaries\n",
    "\n",
    "\n",
    "def display_summaries(summary_dict):\n",
    "    for config,model in summary_dict.items():\n",
    "        display_markdown(\"# {}\".format(config), raw=True)\n",
    "        display(model.summary())\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "def process_results_int(models):\n",
    "    analysis_results = []\n",
    "    ttest_results = []\n",
    "    treatment_labels = {'nominal-group-random':'Random',\n",
    "                        'nominal-group-score':'Highest scoring',\n",
    "                        'nominal-group-duration':'Fastest',\n",
    "                        'nominal-group-efficiency':'Most efficient'}\n",
    "    \n",
    "    for treatment in models.keys(): \n",
    "        model = models[treatment]\n",
    "        result_summary = model.summary()\n",
    "        treatment_dict = {\"Control condition\":treatment_labels[treatment],\n",
    "                          \"n\":result_summary.tables[0].iloc[1,1]}\n",
    "        \n",
    "        coeff_labels = [\"B\", \"B * Low\", \"B * Moderate\", \"B * High\", \"B * Very high\"]\n",
    "        complexities = [\"Low\", \"Moderate\", \"High\", \"Very high\"]\n",
    "        \n",
    "        coeff_table = result_summary.tables[1].iloc[:-1]\n",
    "        coeff_table['asterisks'] = [\" ***\" if x <= 0.001 else \" **\" if x <= 0.01 else \" *\" if x <= 0.05 else \"\" for x in coeff_table['P>|z|'].astype(float).values]\n",
    "        coeff_table['pvals'] = [\"< 0.001\" if x <= 0.001 else str(x) for x in coeff_table['P>|z|'].astype(float).values]        \n",
    "        coeff_table['pval_symbol'] = [\" \" if x[0] == \"<\" else \" = \" for x in coeff_table['pvals']]\n",
    "        coefficients = coeff_table['Coef.'].astype(str) + coeff_table['asterisks'] + \" SE = \" + coeff_table['Std.Err.'].astype(str) + \" P\" + coeff_table['pval_symbol'] + coeff_table['pvals']\n",
    "        \n",
    "        \n",
    "        coefficients = coefficients[[\"C(group_formation, Treatment(reference='{}'))[T.real-group]\".format(treatment)] + \n",
    "                                    [\"C(group_formation, Treatment(reference='{}'))[T.real-group]:C(complexity_cat, Treatment(reference='Very low'))[T.{}]\".format(treatment, x) for x in complexities]]\n",
    "        \n",
    "        \n",
    "        treatment_dict.update(dict(zip(coeff_labels, coefficients.values)))\n",
    "        analysis_results.append(treatment_dict)\n",
    "        \n",
    "        #F-test processing \n",
    "        ttest_dict = {\"Control condition\":treatment_labels[treatment]}\n",
    "        tests = np.array([[0,1,0,0,0,0,1,0,0,0],\n",
    "                          [0,1,0,0,0,0,0,1,0,0],\n",
    "                          [0,1,0,0,0,0,0,0,1,0],\n",
    "                          [0,1,0,0,0,0,0,0,0,1]])\n",
    "        \n",
    "        t_test_results = model.t_test(tests).summary_frame()\n",
    "        t_test_results['asterisks'] = [\" ***\" if x <= 0.001 else \" **\" if x <= 0.01 else \" *\" if x <= 0.05 else \"\" for x in t_test_results['P>|z|'].astype(float).values]  \n",
    "        t_test_results['P>|z|'] = t_test_results['P>|z|'].astype(float).round(3)\n",
    "        t_test_results.loc[t_test_results['P>|z|'] == 0, \"P>|z|\"] = \"< 0.001\"\n",
    "        t_test_results['pval_symbol'] = [\" \" if str(x)[0] == \"<\" else \" = \" for x in t_test_results['P>|z|']]\n",
    "        t_test_results['summary'] = t_test_results['coef'].round(3).astype(str) + t_test_results['asterisks'] + \" SE = \" + t_test_results['std err'].round(3).astype(str) + \" P\" + t_test_results['pval_symbol'] + t_test_results['P>|z|'].astype(str) \n",
    "        \n",
    "        ttest_dict.update(dict(zip(complexities, t_test_results['summary'].values)))\n",
    "        ttest_results.append(ttest_dict)        \n",
    "    \n",
    "    return pd.DataFrame(analysis_results), pd.DataFrame(ttest_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "def process_results_avg(models):\n",
    "    analysis_results = []\n",
    "    treatment_labels = {'nominal-group-random':'Random',\n",
    "                        'nominal-group-score':'Highest scoring',\n",
    "                        'nominal-group-duration':'Fastest',\n",
    "                        'nominal-group-efficiency':'Most efficient'}\n",
    "    \n",
    "    for treatment in models.keys(): \n",
    "        model = models[treatment]\n",
    "        result_summary = model.summary()\n",
    "        treatment_dict = {\"Control condition\":treatment_labels[treatment],\n",
    "                          \"n\":result_summary.tables[0].iloc[1,1]}\n",
    "        \n",
    "        coeff_labels = [result_summary.tables[0].iloc[0,3]]\n",
    "        \n",
    "        coeff_table = result_summary.tables[1].iloc[:-1]\n",
    "        coeff_table['asterisks'] = [\" ***\" if x <= 0.001 else \" **\" if x <= 0.01 else \" *\" if x <= 0.05 else \"\" for x in coeff_table['P>|z|'].astype(float).values]\n",
    "        coeff_table['pvals'] = [\"< 0.001\" if x <= 0.001 else str(x) for x in coeff_table['P>|z|'].astype(float).values]\n",
    "        coeff_table['pval_symbol'] = [\" \" if str(x)[0] == \"<\" else \" = \" for x in coeff_table['pvals']]\n",
    "        coefficients = coeff_table['Coef.'].astype(str) + coeff_table['asterisks'] + \" SE = \" + coeff_table['Std.Err.'].astype(str) + \" P\" + coeff_table['pval_symbol'] + coeff_table['pvals']\n",
    "        \n",
    "        coefficients = coefficients[[\"C(group_formation, Treatment(reference='{}'))[T.real-group]\".format(treatment)]]\n",
    "        \n",
    "        \n",
    "        treatment_dict.update(dict(zip(coeff_labels, coefficients.values)))\n",
    "        analysis_results.append(treatment_dict)\n",
    "        \n",
    "    \n",
    "    return pd.DataFrame(analysis_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "def process_results_ols(models):\n",
    "    analysis_results = []\n",
    "    treatment_labels = {'nominal-group-random':'Random',\n",
    "                        'nominal-group-score':'Highest scoring',\n",
    "                        'nominal-group-duration':'Fastest',\n",
    "                        'nominal-group-efficiency':'Most efficient'}\n",
    "    \n",
    "    for treatment in models.keys():\n",
    "        treatment_dict = {\"Control condition\":treatment_labels[treatment]}\n",
    "        ols_models = models[treatment]\n",
    "        for complexity in ['Very low', \"Low\", \"Moderate\", \"High\", \"Very high\"]:\n",
    "            model = ols_models[complexity]\n",
    "            n = int(model.nobs)\n",
    "            coefficient = np.round(model.params[-1],3)\n",
    "            se = np.round(model.bse[-1],3)\n",
    "            pval = np.round(model.pvalues[-1],3)\n",
    "            \n",
    "            if model.pvalues[-1] <= 0.001: \n",
    "                asterisk = \"***\"\n",
    "            elif model.pvalues[-1] <= 0.01:\n",
    "                asterisk = \"**\"\n",
    "            elif model.pvalues[-1] <= 0.05: \n",
    "                asterisk = \"*\"\n",
    "            else: \n",
    "                asterisk = \"\"\n",
    "\n",
    "            if pval == 0:\n",
    "                pval = \"< 0.001\"\n",
    "            \n",
    "            if str(pval)[0] == \"<\":\n",
    "                pval_sign = \" \"\n",
    "            else:\n",
    "                pval_sign = \" = \"\n",
    "\n",
    "            treatment_dict.update({\"n\":n,\n",
    "                                   complexity:\"{} {} SE = {} P{}{}\".format(coefficient, asterisk, se, pval_sign, pval)})\n",
    "            \n",
    "        analysis_results.append(treatment_dict)\n",
    "        \n",
    "    return pd.DataFrame(analysis_results)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "df = pd.read_pickle(\"./rounds_data_phase2_processed.pkl\").query(\"group_formation != 'solo'\").reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Analyses"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 1. Score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model1_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_score\", False)\n",
    "model1_avg = pairwise_config_analysis(df, \"zscore_score\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model1_ols = pairwise_OLS(df, \"zscore_score\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "display_summaries(model1_int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "display_summaries(model1_avg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 2. Duration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model2_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_round_duration\", False)\n",
    "model2_avg = pairwise_config_analysis(df, \"zscore_round_duration\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model2_ols = pairwise_OLS(df, \"zscore_round_duration\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 3. Efficiency"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model3_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_efficiency\", False)\n",
    "model3_avg = pairwise_config_analysis(df, \"zscore_efficiency\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model3_ols = pairwise_OLS(df, \"zscore_efficiency\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 4. Time to first solution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model4_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_time_to_first_step\", False)\n",
    "model4_avg = pairwise_config_analysis(df, \"zscore_time_to_first_step\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 5. Time to best solution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model5_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_time_from_first_step_to_best\", False)\n",
    "model5_avg = pairwise_config_analysis(df, \"zscore_time_from_first_step_to_best\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "display_summaries(model5_int)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 6. Time to final solution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model6_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_time_from_best_to_final\", False)\n",
    "model6_avg = pairwise_config_analysis(df, \"zscore_time_from_best_to_final\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 7. Time to submission "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model7_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_time_from_final_to_submit\", False)\n",
    "model7_avg = pairwise_config_analysis(df, \"zscore_time_from_final_to_submit\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 8. Number of intermediate solutions "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model8_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_ROUNDFEAT_SOLNS_num_inter_soln\", False)\n",
    "model8_avg = pairwise_config_analysis(df, \"zscore_ROUNDFEAT_SOLNS_num_inter_soln\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "display_summaries(model8_avg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 9. Intermediate solutions / min"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model9_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_intermediate_solution_pace\", False)\n",
    "model9_avg = pairwise_config_analysis(df, \"zscore_intermediate_solution_pace\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "display_summaries(model9_avg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 10. Exploration radius "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model10_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_max_soln_dist_fillna\", False)\n",
    "model10_avg = pairwise_config_analysis(df, \"zscore_max_soln_dist_fillna\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "display_summaries(model10_avg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## 11. Best solution score "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "model11_int = pairwise_config_analysis_complexity_interaction(df, \"zscore_normalized_best_score\", False)\n",
    "model11_avg = pairwise_config_analysis(df, \"zscore_normalized_best_score\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "display_summaries(model11_avg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## Was best solution submitted? "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "df.query(\"group_formation == 'real-group' or group_formation == 'nominal-group-score'\").groupby(\"group_formation\")['ROUNDFEAT_SCORES_bool_submitted_highest_complete_score'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "submitted_best_summary = df.query(\"group_formation == 'real-group' or group_formation == 'nominal-group-score'\").groupby(\"group_formation\")['ROUNDFEAT_SCORES_bool_submitted_highest_complete_score'].agg(['sum', 'count'])\n",
    "\n",
    "proportions_ztest(submitted_best_summary['sum'],\n",
    "                  submitted_best_summary['count'],\n",
    "                  alternative=\"two-sided\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Format and export results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "results_int = [model1_int, model2_int, model3_int, model4_int, model5_int, model6_int, model7_int, model8_int, model9_int, model10_int, model11_int]\n",
    "results_avg = [model1_avg, model2_avg, model3_avg, model4_avg, model5_avg, model6_avg, model7_avg, model8_avg, model9_avg, model10_avg, model11_avg]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": "false"
   },
   "source": [
    "## Write all results to excel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "with pd.ExcelWriter(\"./analysis_results.xlsx\", engine='xlsxwriter') as xlsx_writer:\n",
    "    for index, model in enumerate(results_int):\n",
    "        try:\n",
    "            colname = model['nominal-group-score'].summary().tables[0].iloc[0,3].replace(\"zscore_\", \"\")\n",
    "            print(\"Exporting Model {} -- {}\".format(index+1, colname))\n",
    "            interaction, ttests = process_results_int(model)\n",
    "            interaction.to_excel(xlsx_writer, sheet_name=\"{}_int\".format(colname[:25]), index=False)\n",
    "            ttests.to_excel(xlsx_writer, sheet_name=\"{}_int\".format(colname[:25]), startrow = 6, index=False)\n",
    "        except:\n",
    "            print(\"Can't export: \", index, si_number)\n",
    "            raise\n",
    "            \n",
    "    \n",
    "    for colname,ols_result in list(zip([\"score\", \"round_duration\", \"efficiency\"], [model1_ols, model2_ols, model3_ols])):\n",
    "        print(\"Writing {} OLS\".format(colname))\n",
    "        process_results_ols(ols_result).to_excel(xlsx_writer, sheet_name=\"{}_int\".format(colname[:25]), startrow = 12, index=False)\n",
    "        \n",
    "    \n",
    "    df_avg_effects = pd.concat([process_results_avg(x).set_index([\"Control condition\", \"n\"]) for x in results_avg], axis=1).reset_index()\n",
    "    df_avg_effects.rename(columns={'zscore_score':\"Score\",\n",
    "                                   'zscore_round_duration':'Round duration',\n",
    "                                   'zscore_efficiency':\"Efficiency\",\n",
    "                                   'zscore_time_to_first_step':'Time to first solution', \n",
    "                                   'zscore_time_from_first_step_to_best':'Time to best solution',\n",
    "                                   'zscore_time_from_best_to_final':'Time to final solution',\n",
    "                                   'zscore_time_from_final_to_submit':'Time to submission',\n",
    "                                   'zscore_ROUNDFEAT_SOLNS_num_inter_soln':'Number of intermediate solutions', \n",
    "                                   'zscore_intermediate_solution_pace':'Intermediate solutions / min',\n",
    "                                   'zscore_max_soln_dist_fillna':'Exploration radius',\n",
    "                                   'zscore_normalized_best_score':'Best-found solution'}, inplace=True)\n",
    "    \n",
    "    df_avg_effects.to_excel(xlsx_writer, sheet_name=\"avg_effects\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# OPTIONAL: Save models \n",
    "This code takes the lists of fitted models from the \"Format and export results\" section and writes them to pickle files. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "with open(\"./interaction_effect_models.pkl\", \"wb\") as file: \n",
    "    pickle.dump(results_int, file)\n",
    "\n",
    "with open(\"./average_effect_models.pkl\", \"wb\") as file: \n",
    "    pickle.dump(results_avg, file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "heading_collapsed": "false"
   },
   "outputs": [],
   "source": [
    "#Test saving \n",
    "with open(\"./average_effect_results.pkl\", \"rb\") as file:\n",
    "    test_results = pickle.load(file)\n",
    "display_summaries(test_results[0])\n",
    "del test_results"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "csop1",
   "language": "python",
   "name": "csop1"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
